Introduction to Maps and Spatial Data
Introduction to Maps and Spatial Data#
These examples use the GeoPandas package which adds special
functions for DataFrames to work with geographic data.
There are two basic ways to visualize the data spatial data (aka make maps):
plot() and explore(). plot() creates a matplotlib chart of the data, explore() creates an interactive map with the shapes overlayed on open map data.
For the visualizations to work, though, you have to set a relevant index on the data. In our case here, the index is the district number.
After the basic plots, we merge the school district data with the school demographics data in order to create more detailed examples.
Useful links:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import Markdown as md
from nycschools import schools, geo
# read the GeoJSON file directly from the download link
gdf = geo.load_districts()
# rename the columns
# gdf.columns = ['district', 'area', 'length', 'geometry']
# each shape in "geometry" represents a distict
gdf = gdf.set_index("district")
gdf
| area | length | geometry | |
|---|---|---|---|
| district | |||
| 32 | 51898496.7618 | 37251.0574964 | MULTIPOLYGON (((-73.91181 40.70343, -73.91290 ... |
| 16 | 46763620.3794 | 35848.9043428 | MULTIPOLYGON (((-73.93312 40.69579, -73.93237 ... |
| 17 | 128440514.645 | 68356.1032412 | MULTIPOLYGON (((-73.92044 40.66563, -73.92061 ... |
| 13 | 104871082.804 | 86649.0984086 | MULTIPOLYGON (((-73.97906 40.70595, -73.97924 ... |
| 25 | 443759165.29 | 176211.272136 | MULTIPOLYGON (((-73.82050 40.80101, -73.82040 ... |
| 29 | 420204870.637 | 135100.006491 | MULTIPOLYGON (((-73.73816 40.72896, -73.73716 ... |
| 10 | 282540516.283 | 94957.3984139 | MULTIPOLYGON (((-73.86790 40.90294, -73.86796 ... |
| 26 | 424302397.116 | 131618.853059 | MULTIPOLYGON (((-73.74461 40.77895, -73.74429 ... |
| 18 | 175191726.482 | 120925.187236 | MULTIPOLYGON (((-73.86327 40.58388, -73.86381 ... |
| 15 | 196154191.734 | 153439.205153 | MULTIPOLYGON (((-73.98633 40.69105, -73.98536 ... |
| 27 | 790725033.478 | 572076.106693 | MULTIPOLYGON (((-73.83193 40.59345, -73.83117 ... |
| 8 | 258762275.331 | 222001.724191 | MULTIPOLYGON (((-73.83979 40.83562, -73.83967 ... |
| 7 | 92258856.3652 | 65300.6110934 | MULTIPOLYGON (((-73.89681 40.79581, -73.89694 ... |
| 9 | 82980717.7665 | 44396.4418588 | MULTIPOLYGON (((-73.93309 40.82823, -73.93307 ... |
| 22 | 385697616.013 | 270256.625384 | MULTIPOLYGON (((-73.91990 40.59960, -73.91996 ... |
| 1 | 35160467.8524 | 28647.0153555 | MULTIPOLYGON (((-73.97177 40.72582, -73.97179 ... |
| 2 | 279512458.191 | 207556.482699 | MULTIPOLYGON (((-74.04388 40.69019, -74.04351 ... |
| 3 | 113412690.658 | 51922.3297145 | MULTIPOLYGON (((-73.95672 40.78660, -73.95716 ... |
| 31 | 1604156793.41 | 430016.498898 | MULTIPOLYGON (((-74.05051 40.56642, -74.05047 ... |
| 14 | 150297861.39 | 96653.7952568 | MULTIPOLYGON (((-73.95440 40.73911, -73.95428 ... |
| 4 | 52620569.229 | 52062.8830951 | MULTIPOLYGON (((-73.92134 40.80085, -73.92114 ... |
| 11 | 393216114.225 | 304563.048828 | MULTIPOLYGON (((-73.78833 40.83467, -73.78931 ... |
| 12 | 69088427.7096 | 48691.7772088 | MULTIPOLYGON (((-73.88284 40.84782, -73.88200 ... |
| 21 | 210183056.106 | 123821.906082 | MULTIPOLYGON (((-73.96185 40.62757, -73.96139 ... |
| 6 | 96346045.8858 | 68850.7104133 | MULTIPOLYGON (((-73.92641 40.87762, -73.92635 ... |
| 24 | 394976460.434 | 127338.082622 | MULTIPOLYGON (((-73.90642 40.73987, -73.90648 ... |
| 28 | 247533101.13 | 115703.38462 | MULTIPOLYGON (((-73.84485 40.73575, -73.84480 ... |
| 19 | 199878572.903 | 173955.448638 | MULTIPOLYGON (((-73.84674 40.60485, -73.84672 ... |
| 23 | 47399296.4018 | 40315.8019548 | MULTIPOLYGON (((-73.92137 40.66549, -73.92143 ... |
| 5 | 52518652.0916 | 44503.2919823 | MULTIPOLYGON (((-73.93516 40.83268, -73.93513 ... |
| 30 | 318184623.011 | 151669.593359 | MULTIPOLYGON (((-73.90647 40.79018, -73.90251 ... |
| 20 | 242738512.599 | 94300.7864855 | MULTIPOLYGON (((-74.02553 40.65148, -74.02491 ... |
| 10 | 3282963.15136 | 7883.37233723 | MULTIPOLYGON (((-73.90893 40.87216, -73.90898 ... |
# draw the basic map using the tab20b color map
# the x and y axes are the lat/long coordinates of the shapes
_ = gdf.plot(figsize=(16, 16), cmap="tab20b")
# explore gives us an interactive map
# non-geo columns show up when you hover over a shape
gdf.explore()
Make this Notebook Trusted to load map: File -> Trust Notebook
# load the demographics
# group the data at the district level
# merge the two dataframes on the district
dist_map = gdf.copy()
demo = schools.load_school_demographics()
demo = demo.query(f"ay == {demo.ay.max()}")
aggs = {
'total_enrollment': 'sum',
'asian_pct': 'mean',
'black_pct': 'mean',
'hispanic_pct': 'mean',
'white_pct': 'mean',
'swd_pct': 'mean',
'ell_pct': 'mean',
'poverty_pct': 'mean'
}
demo = demo.groupby("district").agg(aggs)
cols = [c for c in demo.columns if c.endswith("pct")]
for col in cols:
demo[col]= demo[col].apply(lambda x: f"{round(x*100, 2)}")
dist_map = dist_map.reset_index()
dist_map.district = pd.to_numeric(dist_map.district, downcast='integer', errors='coerce')
dist_map = dist_map.join(demo, on="district", how="inner")
dist_map = dist_map.set_index("district")
dist_map = dist_map[['geometry', 'total_enrollment', 'asian_pct',
'black_pct', 'hispanic_pct', 'white_pct', 'swd_pct', 'ell_pct',
'poverty_pct']]
dist_map
| geometry | total_enrollment | asian_pct | black_pct | hispanic_pct | white_pct | swd_pct | ell_pct | poverty_pct | |
|---|---|---|---|---|---|---|---|---|---|
| district | |||||||||
| 32 | MULTIPOLYGON (((-73.91181 40.70343, -73.91290 ... | 9831 | 1.97 | 15.87 | 78.73 | 2.61 | 21.46 | 24.09 | 87.66 |
| 16 | MULTIPOLYGON (((-73.93312 40.69579, -73.93237 ... | 7127 | 2.4 | 68.88 | 21.85 | 4.72 | 27.09 | 5.87 | 86.54 |
| 17 | MULTIPOLYGON (((-73.92044 40.66563, -73.92061 ... | 20628 | 3.23 | 72.76 | 17.7 | 3.67 | 21.67 | 10.56 | 83.58 |
| 13 | MULTIPOLYGON (((-73.97906 40.70595, -73.97924 ... | 24564 | 6.45 | 54.65 | 22.49 | 12.66 | 20.81 | 6.3 | 69.2 |
| 25 | MULTIPOLYGON (((-73.82050 40.80101, -73.82040 ... | 38992 | 49.74 | 6.43 | 30.36 | 11.21 | 15.4 | 20.81 | 68.98 |
| 29 | MULTIPOLYGON (((-73.73816 40.72896, -73.73716 ... | 23434 | 12.62 | 64.65 | 15.85 | 1.8 | 17.79 | 8.52 | 73.02 |
| 10 | MULTIPOLYGON (((-73.86790 40.90294, -73.86796 ... | 54253 | 4.86 | 15.74 | 73.14 | 4.53 | 21.93 | 23.35 | 86.15 |
| 10 | MULTIPOLYGON (((-73.90893 40.87216, -73.90898 ... | 54253 | 4.86 | 15.74 | 73.14 | 4.53 | 21.93 | 23.35 | 86.15 |
| 26 | MULTIPOLYGON (((-73.74461 40.77895, -73.74429 ... | 30663 | 54.71 | 9.72 | 16.47 | 15.48 | 15.09 | 9.94 | 50.69 |
| 18 | MULTIPOLYGON (((-73.86327 40.58388, -73.86381 ... | 14725 | 2.04 | 82.66 | 10.14 | 3.55 | 24.35 | 7.6 | 81.25 |
| 15 | MULTIPOLYGON (((-73.98633 40.69105, -73.98536 ... | 29806 | 13.39 | 15.87 | 37.15 | 28.82 | 23.07 | 13.28 | 57.15 |
| 27 | MULTIPOLYGON (((-73.83193 40.59345, -73.83117 ... | 50409 | 19.04 | 24.99 | 40.28 | 10.03 | 19.13 | 11.2 | 76.61 |
| 8 | MULTIPOLYGON (((-73.83979 40.83562, -73.83967 ... | 26180 | 6.12 | 22.74 | 64.51 | 4.93 | 23.38 | 14.31 | 85.25 |
| 7 | MULTIPOLYGON (((-73.89681 40.79581, -73.89694 ... | 16672 | 1.08 | 26.92 | 69.13 | 1.48 | 25.97 | 16.47 | 91.57 |
| 9 | MULTIPOLYGON (((-73.93309 40.82823, -73.93307 ... | 31605 | 1.66 | 27.12 | 68.65 | 1.48 | 23.87 | 22.55 | 91.88 |
| 22 | MULTIPOLYGON (((-73.91990 40.59960, -73.91996 ... | 36602 | 16.19 | 39.49 | 17.66 | 23.84 | 20.65 | 13.46 | 72.34 |
| 1 | MULTIPOLYGON (((-73.97177 40.72582, -73.97179 ... | 10553 | 13.9 | 18.36 | 49.84 | 14.25 | 26.78 | 8.41 | 72.2 |
| 2 | MULTIPOLYGON (((-74.04388 40.69019, -74.04351 ... | 73667 | 17.06 | 16.52 | 36.37 | 24.73 | 20.15 | 8.89 | 56.16 |
| 3 | MULTIPOLYGON (((-73.95672 40.78660, -73.95716 ... | 24943 | 7.11 | 24.73 | 37.02 | 25.68 | 22.51 | 5.99 | 56.87 |
| 31 | MULTIPOLYGON (((-74.05051 40.56642, -74.05047 ... | 59648 | 11.88 | 13.77 | 32.02 | 39.47 | 25.44 | 8.57 | 60.49 |
| 14 | MULTIPOLYGON (((-73.95440 40.73911, -73.95428 ... | 16739 | 4.49 | 21.69 | 58.51 | 12.63 | 23.49 | 11.29 | 75.62 |
| 4 | MULTIPOLYGON (((-73.92134 40.80085, -73.92114 ... | 11754 | 7.01 | 25.49 | 59.13 | 5.18 | 26.83 | 10.96 | 82.63 |
| 11 | MULTIPOLYGON (((-73.78833 40.83467, -73.78931 ... | 40363 | 7.21 | 37.13 | 45.17 | 8.27 | 21.31 | 12.09 | 81.76 |
| 12 | MULTIPOLYGON (((-73.88284 40.84782, -73.88200 ... | 18732 | 2.78 | 24.78 | 69.42 | 1.5 | 23.46 | 20.57 | 92.18 |
| 21 | MULTIPOLYGON (((-73.96185 40.62757, -73.96139 ... | 37486 | 23.48 | 14.11 | 28.81 | 31.24 | 19.2 | 21.33 | 75.89 |
| 6 | MULTIPOLYGON (((-73.92641 40.87762, -73.92635 ... | 19167 | 1.32 | 6.11 | 84.92 | 6.12 | 22.25 | 29.4 | 84.57 |
| 24 | MULTIPOLYGON (((-73.90642 40.73987, -73.90648 ... | 55320 | 16.59 | 2.69 | 66.36 | 12.82 | 17.69 | 25.09 | 77.67 |
| 28 | MULTIPOLYGON (((-73.84485 40.73575, -73.84480 ... | 40144 | 29.39 | 23.61 | 27.1 | 13.1 | 15.82 | 12.22 | 67.96 |
| 19 | MULTIPOLYGON (((-73.84674 40.60485, -73.84672 ... | 20951 | 7.39 | 45.28 | 43.2 | 2.03 | 22.59 | 13.51 | 89.96 |
| 23 | MULTIPOLYGON (((-73.92137 40.66549, -73.92143 ... | 8661 | 1.35 | 70.83 | 23.65 | 1.63 | 26.78 | 5.48 | 90.75 |
| 5 | MULTIPOLYGON (((-73.93516 40.83268, -73.93513 ... | 12223 | 3.91 | 44.55 | 39.61 | 8.7 | 27.47 | 7.54 | 79.07 |
| 30 | MULTIPOLYGON (((-73.90647 40.79018, -73.90251 ... | 40939 | 19.09 | 6.55 | 53.41 | 18.15 | 15.29 | 24.65 | 73.07 |
| 20 | MULTIPOLYGON (((-74.02553 40.65148, -74.02491 ... | 54920 | 47.33 | 1.82 | 26.72 | 22.9 | 16.78 | 27.05 | 79.07 |
# make a plot that uses the poverty percent as a the value for the color map
# this is the `column` keyword argument for plot()
fig, ax = plt.subplots(figsize=(16, 16))
sns.set_context('talk')
# don't show the boundary box or x/y ticks
plt.axis('off')
fig.tight_layout()
ax.set_title('School District Poverty Levels', pad=20)
# put the district number in the center of each district
def label(row):
xy=row.geometry.centroid.coords[0]
ax.annotate(row.name, xy=xy, ha='center', fontsize=14)
dist_map.apply(label, axis=1)
_ = dist_map.plot(legend="True", ax=ax, column="poverty_pct", cmap="coolwarm")
# if we pass a different column, we can create a different plot
fig, ax = plt.subplots(figsize=(16, 16))
sns.set_context('talk')
plt.axis('off')
fig.tight_layout()
ax.set_title('School District Enrollments Levels (size)', pad=20)
dist_map.plot(legend="True", ax=ax, column="total_enrollment", cmap="autumn_r")
plt.show()
# explore also has a column keyword
# here we show the districts by poverty level
# mouse over and you can see all of the demographic data for the district
dist_map.explore(column="poverty_pct", cmap="coolwarm")
Make this Notebook Trusted to load map: File -> Trust Notebook